{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Training a model with `traffic_last_5min` feature\n",
    "\n",
    "\n",
    "## Introduction\n",
    "\n",
    "In this notebook, we'll train a taxifare prediction model but this time with an additional feature of `traffic_last_5min`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "try:\n",
    "    from google.cloud import aiplatform\n",
    "\n",
    "except ImportError:\n",
    "    !pip3 install -U google-cloud-aiplatform --user\n",
    "\n",
    "    print(\"Please restart the kernel and re-run the notebook.\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If the above command resulted in an installation, please restart the notebook kernel and re-run the notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import shutil\n",
    "\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "\n",
    "from datetime import datetime\n",
    "from matplotlib import pyplot as plt\n",
    "from tensorflow import keras\n",
    "\n",
    "from google.cloud import aiplatform\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import Dense, DenseFeatures\n",
    "from tensorflow.keras.callbacks import TensorBoard\n",
    "\n",
    "print(tf.__version__)\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "PROJECT = 'cloud-training-demos' # REPLACE WITH YOUR PROJECT ID\n",
    "BUCKET = 'cloud-training-demos' # REPLACE WITH YOUR BUCKET NAME\n",
    "REGION = 'us-central1' # REPLACE WITH YOUR BUCKET REGION e.g. us-central1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# For Bash Code\n",
    "os.environ['PROJECT'] = PROJECT\n",
    "os.environ['BUCKET'] = BUCKET\n",
    "os.environ['REGION'] = REGION"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "gcloud config set project $PROJECT\n",
    "gcloud config set ai/region $REGION"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Load raw data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!ls -l ../data/taxi-traffic*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!head ../data/taxi-traffic*"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Use tf.data to read the CSV files\n",
    "\n",
    "These functions for reading data from the csv files are similar to what we used in the Introduction to Tensorflow module. Note that here we have an addtional feature `traffic_last_5min`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "CSV_COLUMNS = [\n",
    "    'fare_amount',\n",
    "    'dayofweek',\n",
    "    'hourofday',\n",
    "    'pickup_longitude',\n",
    "    'pickup_latitude',\n",
    "    'dropoff_longitude',\n",
    "    'dropoff_latitude',\n",
    "    'traffic_last_5min'\n",
    "]\n",
    "LABEL_COLUMN = 'fare_amount'\n",
    "DEFAULTS = [[0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0], [0.0]]\n",
    "\n",
    "\n",
    "def features_and_labels(row_data):\n",
    "    label = row_data.pop(LABEL_COLUMN)\n",
    "    features = row_data\n",
    "\n",
    "    return features, label\n",
    "\n",
    "\n",
    "def create_dataset(pattern, batch_size=1, mode=tf.estimator.ModeKeys.EVAL):\n",
    "    dataset = tf.data.experimental.make_csv_dataset(\n",
    "        pattern, batch_size, CSV_COLUMNS, DEFAULTS)\n",
    "\n",
    "    dataset = dataset.map(features_and_labels)\n",
    "\n",
    "    if mode == tf.estimator.ModeKeys.TRAIN:\n",
    "        dataset = dataset.shuffle(buffer_size=1000).repeat()\n",
    "\n",
    "    # take advantage of multi-threading; 1=AUTOTUNE\n",
    "    dataset = dataset.prefetch(1)\n",
    "    return dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "INPUT_COLS = [\n",
    "    'dayofweek',\n",
    "    'hourofday',\n",
    "    'pickup_longitude',\n",
    "    'pickup_latitude',\n",
    "    'dropoff_longitude',\n",
    "    'dropoff_latitude',\n",
    "    'traffic_last_5min'\n",
    "]\n",
    "\n",
    "# Create input layer of feature columns\n",
    "feature_columns = {\n",
    "    colname: tf.feature_column.numeric_column(colname)\n",
    "    for colname in INPUT_COLS\n",
    "    }"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Build a simple keras DNN model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Build a keras DNN model using Sequential API\n",
    "def build_model(dnn_hidden_units):\n",
    "    model = Sequential(DenseFeatures(feature_columns=feature_columns.values()))\n",
    "    \n",
    "    for num_nodes in dnn_hidden_units:\n",
    "        model.add(Dense(units=num_nodes, activation=\"relu\"))\n",
    "    \n",
    "    model.add(Dense(units=1, activation=\"linear\"))    \n",
    "    \n",
    "    # Create a custom evaluation metric\n",
    "    def rmse(y_true, y_pred):\n",
    "        return tf.sqrt(tf.reduce_mean(tf.square(y_pred - y_true)))\n",
    "\n",
    "    # Compile the keras model\n",
    "    model.compile(optimizer=\"adam\", loss=\"mse\", metrics=[rmse, \"mse\"])\n",
    "    \n",
    "    return model"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Next, we can call the `build_model` to create the model. Here we'll have two hidden layers before our final output layer. And we'll train with the same parameters we used before."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "HIDDEN_UNITS = [32, 8]\n",
    "\n",
    "model = build_model(dnn_hidden_units=HIDDEN_UNITS)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "BATCH_SIZE = 1000\n",
    "NUM_TRAIN_EXAMPLES = 10000 * 6  # training dataset will repeat, wrap around\n",
    "NUM_EVALS = 60  # how many times to evaluate\n",
    "NUM_EVAL_EXAMPLES = 10000  # enough to get a reasonable sample\n",
    "\n",
    "trainds = create_dataset(\n",
    "    pattern='../data/taxi-traffic-train*',\n",
    "    batch_size=BATCH_SIZE,\n",
    "    mode=tf.estimator.ModeKeys.TRAIN)\n",
    "\n",
    "evalds = create_dataset(\n",
    "    pattern='../data/taxi-traffic-valid*',\n",
    "    batch_size=BATCH_SIZE,\n",
    "    mode=tf.estimator.ModeKeys.EVAL).take(NUM_EVAL_EXAMPLES//1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%time\n",
    "steps_per_epoch = NUM_TRAIN_EXAMPLES // (BATCH_SIZE * NUM_EVALS)\n",
    "\n",
    "LOGDIR = \"./taxi_trained\"\n",
    "history = model.fit(x=trainds,\n",
    "                    steps_per_epoch=steps_per_epoch,\n",
    "                    epochs=NUM_EVALS,\n",
    "                    validation_data=evalds,\n",
    "                    callbacks=[TensorBoard(LOGDIR)])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "RMSE_COLS = ['rmse', 'val_rmse']\n",
    "\n",
    "pd.DataFrame(history.history)[RMSE_COLS].plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.predict(x={\"dayofweek\": tf.convert_to_tensor([6]),\n",
    "                 \"hourofday\": tf.convert_to_tensor([17]),\n",
    "                 \"pickup_longitude\": tf.convert_to_tensor([-73.982683]),\n",
    "                 \"pickup_latitude\": tf.convert_to_tensor([40.742104]),\n",
    "                 \"dropoff_longitude\": tf.convert_to_tensor([-73.983766]),\n",
    "                 \"dropoff_latitude\": tf.convert_to_tensor([40.755174]),\n",
    "                \"traffic_last_5min\": tf.convert_to_tensor([114])},\n",
    "              steps=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Export and deploy model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "OUTPUT_DIR = \"./export/savedmodel\"\n",
    "shutil.rmtree(OUTPUT_DIR, ignore_errors=True)\n",
    "EXPORT_PATH = os.path.join(OUTPUT_DIR,\n",
    "                           datetime.now().strftime(\"%Y%m%d%H%M%S\"))\n",
    "tf.saved_model.save(model, EXPORT_PATH)  # with default serving function\n",
    "os.environ['EXPORT_PATH'] = EXPORT_PATH"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that the last `gcloud` call below, which deploys the mode, can take a few minutes, and you might not see the earlier `echo` outputs while that job is still running. If you want to make sure that your notebook is not stalled and your model is actually getting deployed, view your models in the console at https://console.cloud.google.com/vertex-ai/models, click on your model, and you should see your endpoint listed with an \"in progress\" icon next to it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%bash\n",
    "TIMESTAMP=$(date -u +%Y%m%d_%H%M%S)\n",
    "MODEL_NAME=taxifare_$TIMESTAMP\n",
    "ENDPOINT_NAME=taxifare_endpoint_$TIMESTAMP\n",
    "IMAGE_URI=\"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-3:latest\"\n",
    "ARTIFACT_DIRECTORY=gs://${BUCKET}/${MODEL_NAME}/\n",
    "echo $ARTIFACT_DIRECTORY\n",
    "\n",
    "gcloud storage cp --recursive ${EXPORT_PATH}/* ${ARTIFACT_DIRECTORY}\n",
    "\n",
    "# Model\n",
    "gcloud ai models upload \\\n",
    "    --region=$REGION \\\n",
    "    --display-name=$MODEL_NAME \\\n",
    "    --container-image-uri=$IMAGE_URI \\\n",
    "    --artifact-uri=$ARTIFACT_DIRECTORY\n",
    "\n",
    "MODEL_ID=$(gcloud ai models list \\\n",
    "    --region=$REGION \\\n",
    "    --filter=display_name=\"$MODEL_NAME\" | cut -d\" \" -f1 | head -n2 | tail -n1)\n",
    "\n",
    "echo \"MODEL_NAME=${MODEL_NAME}\"\n",
    "echo \"MODEL_ID=${MODEL_ID}\"\n",
    "\n",
    "# Endpoint\n",
    "gcloud ai endpoints create \\\n",
    "  --region=$REGION \\\n",
    "  --display-name=$ENDPOINT_NAME\n",
    "\n",
    "ENDPOINT_ID=$(gcloud ai endpoints list \\\n",
    "  --region=$REGION \\\n",
    "  --filter=display_name=\"$ENDPOINT_NAME\" | cut -d\" \" -f1 | head -n2 | tail -n1)\n",
    "\n",
    "echo \"ENDPOINT_NAME=${ENDPOINT_NAME}\"\n",
    "echo \"ENDPOINT_ID=${ENDPOINT_ID}\"\n",
    "\n",
    "# Deployment\n",
    "DEPLOYED_MODEL_NAME=${MODEL_NAME}_deployment\n",
    "MACHINE_TYPE=n1-standard-2\n",
    "MIN_REPLICA_COUNT=1\n",
    "MAX_REPLICA_COUNT=3\n",
    "\n",
    "gcloud ai endpoints deploy-model $ENDPOINT_ID \\\n",
    "  --region=$REGION \\\n",
    "  --model=$MODEL_ID \\\n",
    "  --display-name=$DEPLOYED_MODEL_NAME \\\n",
    "  --machine-type=$MACHINE_TYPE \\\n",
    "  --min-replica-count=$MIN_REPLICA_COUNT \\\n",
    "  --max-replica-count=$MAX_REPLICA_COUNT \\\n",
    "  --traffic-split=0=100"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Take note of the `ENDPOINT_ID` printed above, as you will need it in the next lab."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The above model deployment can be initiated from the Vertex AI Python SDK as well, as seen below. In this case, we do not need to create the Endpoint ourselves (we could though), but it is implicitly created during the `model.deploy()` call."
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "TIMESTAMP=datetime.strftime(datetime.now(), '%Y%m%d_%H%M%S')\n",
    "MODEL_NAME=f\"taxifare_{TIMESTAMP}\"\n",
    "ENDPOINT_NAME=f\"taxifare_endpoint_{TIMESTAMP}\"\n",
    "ARTIFACT_DIRECTORY=f\"gs://{BUCKET}/{MODEL_NAME}/\"\n",
    "print(ARTIFACT_DIRECTORY)\n",
    "IMAGE_URI=\"us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-3:latest\"\n",
    "\n",
    "!gcloud storage cp --recursive $EXPORT_PATH/* $ARTIFACT_DIRECTORY\n",
    "\n",
    "aiplatform.init(project=PROJECT, location=REGION)\n",
    "\n",
    "# Model\n",
    "model = aiplatform.Model.upload(\n",
    "    display_name=MODEL_NAME,\n",
    "    artifact_uri=ARTIFACT_DIRECTORY,\n",
    "    serving_container_image_uri=IMAGE_URI,\n",
    ")\n",
    "\n",
    "model.wait()\n",
    "\n",
    "print(model.display_name)\n",
    "print(model.resource_name)\n",
    "\n",
    "# Deployment\n",
    "DEPLOYED_MODEL_NAME=f\"{MODEL_NAME}_deployment\"\n",
    "MACHINE_TYPE=\"n1-standard-2\"\n",
    "MIN_REPLICA_COUNT=1\n",
    "MAX_REPLICA_COUNT=3\n",
    "\n",
    "model = aiplatform.Model(model_name=model.resource_name)\n",
    "\n",
    "endpoint = model.deploy(\n",
    "    deployed_model_display_name=DEPLOYED_MODEL_NAME,\n",
    "    traffic_split={\"0\": 100},\n",
    "    machine_type=MACHINE_TYPE,\n",
    "    min_replica_count=MIN_REPLICA_COUNT,\n",
    "    max_replica_count=MAX_REPLICA_COUNT,\n",
    ")\n",
    "\n",
    "model.wait()\n",
    "\n",
    "print(endpoint.display_name)\n",
    "print(endpoint.resource_name)\n",
    "print(f\"Model {model.display_name} deployed to Endpoint {endpoint.display_name}.\")\n",
    "print(f\"ENDPOINT_ID={endpoint.resource_name.split('/')[-1]}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright 2021 Google Inc. Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License"
   ]
  }
 ],
 "metadata": {
  "environment": {
   "name": "tf2-gpu.2-3.m71",
   "type": "gcloud",
   "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-3:m71"
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
